{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import xgboost as xgb\n",
    "import sys,random\n",
    "import pickle\n",
    "import os\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "#load data\n",
    "train_x_date = pd.read_csv('../../preprocess_data/train_x_date.csv')\n",
    "train_y = pd.read_csv('../../preprocess_data/train_y_33465.csv')\n",
    "train_y['id'] = train_x_date.id\n",
    "train_xy = pd.merge(train_x_date, train_y, on='id',copy=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "group_m = train_xy.groupby(by='loan_month')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "group_m.mean()['label'].to_csv('label-month.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "group_d = train_xy.groupby(by='loan_day')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "group_d.mean()['label'].to_csv('label-day.csv',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 产生新特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_date = pd.read_csv('../../preprocess_data/train_x_date.csv')\n",
    "unlabel_date = pd.read_csv('../../preprocess_data/unlabel_x_date.csv')\n",
    "valid_date= pd.read_csv('../../preprocess_data/valid_date.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_m = pd.read_csv('label-month.csv',header=None).rank()\n",
    "label_m = label_m.values.reshape(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_weight(df):\n",
    "    df['mon_w'] = df.apply(lambda row: int(label_m[row['loan_month']-1]),axis =1)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_date = gen_weight(train_date)\n",
    "unlabel_date = gen_weight(unlabel_date)\n",
    "valid_date = gen_weight(valid_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_date.to_csv('../../preprocess_data/train_x_date_w.csv',index=False)\n",
    "unlabel_date.to_csv('../../preprocess_data/unlabel_x_date_w.csv',index=False)\n",
    "valid_date.to_csv('../../preprocess_data/valid_date_w.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
